1 Rmd settings

Sys.setenv(LANG = "en") #English
knitr::opts_chunk$set(echo = TRUE)
rm(list = ls())

path <- getwd()

setwd(path)

# packages
pacman::p_load(tidyverse, plotly,readxl,scales, extrafont,PerformanceAnalytics, GGally, patchwork, ggpubr, ggrepel, stargazer, kableExtra,modelsummary)

# Font for windows and mac
if (stringr::str_detect(path, pattern="Users")){ 
  
   theme_set(theme_classic(base_size = 10, base_family = "HiraginoSans-W3"))  # For Mac OS

 } else{
  
theme_set(theme_classic(base_size = 10, base_family = "Arial")) # For Windows
   }    

2 Contents /全体の流れのメモ

  • Descriptive statistics / 記述統計
  • Correlation between two treatment variables / 処置変数間の相関
  • Correlation between the main treatment varialbe and suicod outcomes / 処置変数と自殺アウトカムの相関

3 Read data/分析用データの読み込み

df_analysis <- readr::read_csv("output/df_analysis.csv")
## Rows: 1551 Columns: 149
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr    (4): prefec_kanji, prefecture, prefec, prefec_kanji2
## dbl  (144): id, month, year, suicide_total, suicide_male, suicide_female, su...
## date   (1): date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

4 Descriptive statistics/記述統計表

4.1 Construct a dataset for descriptive stat table in the paper/論文用の記述統計のセットを抽出

df_analysis_desc_stat <- df_analysis %>%
  select(date, prefec_kanji, # date and prefecture
         unemploy_shock_diff2,  # main treatment
         job_seeker_total_shock, # alternative treatment
         
         suicide_rate,  # suicide
         suicide_rate_female,
         suicide_rate_male, 
         
         unemp_benefit_number_total, # unemployment benefit  
         unemp_benefit_number_female,
         unemp_benefit_number_male, 
         
         yoy_unemp_benefit_number_total,  # unemployment benefit(YOY) 2021Nov11 added
         yoy_unemp_benefit_number_female, #2021Nov11 added
         yoy_unemp_benefit_number_male,   #2021Nov11 added
         
         koguchi_number, # Emergency small-amount fund / 緊急小口資金
         sogo_number, # General support fund / 総合支援資金
         jukyo_number, # Housing Security Benefits / 住居確保給付金
         
         persons_receive, # Public Assistance recipients / 生活保護受給者数
         households_receive, # Public Assistance recipient households / 生活保護受給世帯数
         
         yoy_persons_receive,   #2021Nov11 追加
         yoy_households_receive, #2021Nov11 追加

         infection_rate_cumulative2020jun, # COVID-19 cumulative infection rate up to June 2020
         death_rate_cumulative2020jun, # COVID-19 cumulative death rate up to June 2020
         google_mobility_index_2020may, # Google Mobility index at the end of May 2020
         Population_per_1_km_2_of_inhabitable_area, # Population density, measured based on inhabitable area
         Secondary_industry_ratio, # secondary industry ratio
         Tertiary_industry_ratio, # tertiary (service) industry ratio
         Ratio_of_aged_population, # ratio of aged (65+) to working age (1? - 64)
         Total_population) # total population

4.2 Keep one month for cross-sectional variables/クロスセクション変数は1ヶ月分のみ

# 2021Sep30 

# Keep only one month data for cross-sectional treatment and control variables / 処置変数とコントロール変数は1月分だけ残す(目視チェック)
df_analysis_desc_stat <- df_analysis_desc_stat %>%
   dplyr::mutate(unemploy_shock_diff2 = case_when(date == "2019-01-01" ~ unemploy_shock_diff2)) %>%
   dplyr::mutate(job_seeker_total_shock = case_when(date == "2019-01-01" ~ job_seeker_total_shock)) %>%
   dplyr::mutate(google_mobility_index_2020may = case_when(date == "2019-01-01" ~ google_mobility_index_2020may)) %>%
   dplyr::mutate(infection_rate_cumulative2020jun = case_when(date == "2019-01-01" ~ infection_rate_cumulative2020jun)) %>%
   dplyr::mutate(death_rate_cumulative2020jun = case_when(date == "2019-01-01" ~ death_rate_cumulative2020jun)) %>%
   dplyr::mutate(Population_per_1_km_2_of_inhabitable_area = case_when(date == "2019-01-01" ~ Population_per_1_km_2_of_inhabitable_area)) %>%
   dplyr::mutate(Secondary_industry_ratio = case_when(date == "2019-01-01" ~ Secondary_industry_ratio)) %>%
   dplyr::mutate(Tertiary_industry_ratio = case_when(date == "2019-01-01" ~ Tertiary_industry_ratio)) %>%
   dplyr::mutate(Ratio_of_aged_population = case_when(date == "2019-01-01" ~ Ratio_of_aged_population))  %>%
   dplyr::mutate(Total_population = case_when(date == "2019-01-01" ~ Total_population))

4.3 Show desc stat in html/記述統計のhtml表示

# summary stat: skimr
#skimr::skim(df_analysis_desc_stat)

# summary stat: text
# 処置変数とコントロール変数の記述統計量(html表示用)

stargazer::stargazer(data.frame(df_analysis_desc_stat),
          type = "text",digits = 3,
          summary.stat = c("n", "mean","sd","median","min", "max"),
          covariate.labels = c("Employment shock (baseline)",
                               "Employment shock (full-time)",
                               "Suicide rate (total)",
                               "Suicide rate (female)", 
                               "Suicide rate (male)",
                               "Unemployment benefit (total)", 
                               "Unemployment benefit (female)", 
                               "Unemployment benefit (male)", 
                               "Unemployment bene. (YOY, total)", 
                               "Unemployment benefit (YOY, female)", 
                               "Unemployment benefit (YOY, male)", 
                               "Emergency S.A. Funds",
                               "General Support Funds",
                               "Housing Security Benefit",
                               "Public assistance (recipients)",
                               "Public assistance (households)",
                               "Public assistance (YOY, recipients)",
                               "Public assistance (YOY, households)",
                               "Cumulative infection rate",
                               "Cumulative death rate",
                               "Google Mobility index",
                               "Population density",
                               "Ratio of employees (secondary)",
                               "Ratio of employees (service)",
                               "Elderly dependency rate",
                               "Total population")) 
## 
## ==========================================================================================
## Statistic                             N     Mean    St. Dev.   Median     Min       Max   
## ------------------------------------------------------------------------------------------
## Employment shock (baseline)          47     0.247     0.418     0.162    -0.767    1.227  
## Employment shock (full-time)         47     0.080     0.082     0.109    -0.139    0.216  
## Suicide rate (total)                1,551   1.356     0.376     1.322    0.177     3.266  
## Suicide rate (female)               1,551   0.781     0.342     0.758    0.000     2.165  
## Suicide rate (male)                 1,551   1.971     0.626     1.896    0.000     4.686  
## Unemployment benefit (total)        1,551  326.385   61.237    319.203  202.109   526.635 
## Unemployment benefit (female)       1,551  375.744   73.201    368.638  225.504   607.705 
## Unemployment benefit (male)         1,551  273.122   51.882    264.760  176.699   454.545 
## Unemployment bene. (YOY, total)     1,551  12.251    38.010     3.680   -137.246  191.956 
## Unemployment benefit (YOY, female)  1,551   9.552    39.779     1.292   -175.598  206.215 
## Unemployment benefit (YOY, male)    1,551  15.196    38.510     5.725   -94.493   187.694 
## Emergency S.A. Funds                 893   28.505    55.847     0.697    0.000    567.447 
## General Support Funds                893   14.903    36.930     0.034    0.000    426.290 
## Housing Security Benefit             423    6.130     9.494     2.782    0.000    81.259  
## Public assistance (recipients)      1,551 1,398.269  666.371  1,303.267 332.386  3,248.442
## Public assistance (households)      1,551 1,103.915  514.651   993.016  289.867  2,516.684
## Public assistance (YOY, recipients) 1,551  -10.959   18.902    -9.456   -61.430   46.458  
## Public assistance (YOY, households) 1,551   3.012    11.851     2.311   -21.452   50.504  
## Cumulative infection rate            47     8.512     8.556     6.084    0.000    44.717  
## Cumulative death rate                47     0.433     0.633     0.123    0.000     2.373  
## Google Mobility index                47    -22.180    4.864    -22.016  -39.048   -12.871 
## Population density                   47   1,350.738 1,781.228  795.600  234.700  9,792.900
## Ratio of employees (secondary)       47     0.245     0.049     0.235    0.138     0.331  
## Ratio of employees (service)         47     0.660     0.039     0.665    0.601     0.735  
## Elderly dependency rate              47    53.502     7.508    53.900    35.000   70.100  
## Total population                     47    268.489   277.929   160.000   56.000  1,392.000
## ------------------------------------------------------------------------------------------

4.4 Save desc stat tex/記述統計のtex出力

# 2021Oct7
# Tretment, Outcome, Covariate
# modelsummary with kableExtra
# kableExtra::pack_rows
# kableExtra::add_footnote cannot be used (unsolved)

table_desc_stat <-  modelsummary::datasummary(
                          (`Employment shock (baseline)` = unemploy_shock_diff2) +
                          (`Employment shock (full-time)` = job_seeker_total_shock) +
                          (`Suicide rate (total)` = suicide_rate) +
                          (`Suicide rate (female)` = suicide_rate_female) +
                          (`Suicide rate (male)` = suicide_rate_male) +
                          (`Unemployment benefit (total)` = unemp_benefit_number_total) +
                          (`Unemployment benefit (female)` = unemp_benefit_number_female) +
                          (`Unemployment benefit (male)` = unemp_benefit_number_male) +
                          (`Unemployment benefit (total, yoy)` = yoy_unemp_benefit_number_total) +
                          (`Unemployment benefit (female, yoy)` = yoy_unemp_benefit_number_female) +
                          (`Unemployment benefit (male, yoy)` = yoy_unemp_benefit_number_male) +
                          (`Emergency Small Ammount Funds` = koguchi_number) +
                          (`General Support Funds` = sogo_number) +
                          (`Housing Security Benefit` = jukyo_number) +
                          (`Public assistance (recipients)` = persons_receive) +
                          (`Public assistance (households)` = households_receive) +
                          (`Public assistance (recipients, yoy)` = yoy_persons_receive) +
                          (`Public assistance (households, yoy)` = yoy_households_receive) +
                          (`Cumulative infection rate` = infection_rate_cumulative2020jun) +
                          (`Cumulative death rate` = death_rate_cumulative2020jun) +
                          (`Google Mobility index` = google_mobility_index_2020may) +
                          (`Population density` = Population_per_1_km_2_of_inhabitable_area) +
                          (`Ratio of employees (secondary)` = Secondary_industry_ratio) +
                          (`Ratio of employees (service)` = Tertiary_industry_ratio) +
                          (`Elderly dependency rate` = Ratio_of_aged_population) +
                          (`Total population` = Total_population) ~
                            N + Mean + (Std.Dev. = SD) + Min + Max, 
                          label = "tab:summary_stat",
                          title = "Summary Statistics",
                          data = df_analysis_desc_stat,
                          output = 'latex') %>% 
  kableExtra::pack_rows("Treatment",1,2) %>% 
  kableExtra::pack_rows("Outcome",3,18) %>% 
  kableExtra::pack_rows("Covariate",19,26) %>% 
  kableExtra::add_footnote(c("\\parbox[t]{\\textwidth}{Notes: The employment shock is a cross-section variable calculated based on equation \\eqref{eq:employment_shock}. All the outcome variables are calculated per 100,000 population. For the definition of each variable, see Table \\ref{tab:data_source} in Appendix \\ref{sec:background_info}. Outcome variables such as suicide rates, unemployment benefits, and Public assitance recipients are 21-months data (from January 2019 to September 2020) while the variables of Emergency Small Amount Funds, General Support Funds, and Housing Security Benefit are more restricted due to data limitation. ``yoy'' means year-on-year difference. \\\\
Sources: See Table \\ref{tab:data_source} in Appendix \\ref{sec:background_info}.}"),threeparttable = TRUE, notation = "none",escape = FALSE)
## Warning: To compile a LaTeX document with this table, the following commands must be placed in the document preamble:
## 
## \usepackage{booktabs}
## \usepackage{siunitx}
## \newcolumntype{d}{S[input-symbols = ()]}
## 
## To disable `siunitx` and prevent `modelsummary` from wrapping numeric entries in `\num{}`, call:
## 
## options("modelsummary_format_numeric_latex" = "plain")
## 
## This warning is displayed once per session.
cat(table_desc_stat, sep = '\n', file = "output/table_summary_stat.tex")

5 Treatment variation by prefecture/都道府県別の処置変数のグラフ

5.1 Extract one-month data/クロスセクションデータ抽出

# 1月分のデータ抽出
df_employ_shock <- df_analysis %>% filter(date == "2019-01-01")

5.2 Treatment variable: YOY of diff 2 of unemployment rate

  • (2020Q2 - 2019Q4) - (2019Q2 - 2018Q4)
  • (2020年4~6月の値 - 2019年10~12月の値)- (2019年4~6月の値 - 2018年10~12月の値)
g_unemploy_shock <- ggplot2::ggplot(df_employ_shock, aes(x = unemploy_shock_diff2, y =  reorder(prefec, unemploy_shock_diff2))) +
   geom_bar(stat = "identity") +
   theme(legend.position = 'none') +
   theme_light() +
   labs(title = "(a) Employment shock", x = "Employment shock measured by the unemployment rate",y = "") 
ggplotly(g_unemploy_shock)
#Extract data / データ抽出
df_unemploy_shock_diff2 <- df_employ_shock %>%
  select(prefec_kanji, unemploy_shock_diff2)

# save data as csv for check
write_excel_csv(df_unemploy_shock_diff2, "output/unemploy_shock_diff2.csv")

6 Correlation between two treatments/2変数の相関 by ggplot

6.1 Function for plotting/散布図グラフの関数

scatter_treat_outcome = function(dataset, treat_var, outcome_var, 
                                 point_size_var, main_title, x_title, y_title){
  
dataset %>%
    ggplot(aes(x = treat_var, y = outcome_var, size = point_size_var, label = prefec)) +
    geom_point(color = "black", shape = 21, fill = "blue1", alpha = 0.5) +
    geom_smooth(method = "lm", se = FALSE, color = "gray20", linetype = "dashed") +
    labs(title = main_title, x = x_title, y = y_title) +
    theme_classic()  +
    scale_size(range = c(2,12)) +
    theme(legend.position = 'none') +
    geom_vline(xintercept= 0, colour = "gray") +
    geom_hline(yintercept= 0, colour = "gray")
}

6.2 Scatter plot/処置変数間の散布図

6.2.1 Extract data/データ抽出

df_correlation <- df_analysis %>%
  dplyr::filter(date == "2020-01-01") %>%
  dplyr::select(unemploy_shock_diff2, job_seeker_total_shock, population_total, prefec)

6.2.2 Scatter plot/散布図

  • unemployment-rate shock(main) and full-time unemployment-rate shock(alternative)
  • 失業率ショックと有効求職者割合ショック
graph_unemploy_job_seeker_plot <- scatter_treat_outcome(dataset = df_correlation, 
                      treat_var = df_correlation$unemploy_shock_diff2,
                      outcome_var = df_correlation$job_seeker_total_shock,
                      point_size_var = df_correlation$population_total,
                      main_title = "",
                      x_title = "Baseline employment shock",
                      y_title = "''Full-time'' employment shock") +
                      geom_text_repel(size=3)

ggplotly(graph_unemploy_job_seeker_plot)
## `geom_smooth()` using formula 'y ~ x'
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
# save as PDF
ggsave(file = "output/graph_unemploy_job_seeker_plot.pdf", plot = graph_unemploy_job_seeker_plot, 
       dpi = 100, width = 6, height = 5)   
## `geom_smooth()` using formula 'y ~ x'

6.2.3 Check regression

reg1 <- estimatr::lm_robust(job_seeker_total_shock ~ unemploy_shock_diff2, 
                            data = df_correlation, se_type = "stata")

texreg::screenreg(list(reg1), include.ci = FALSE, digits = 3)
## 
## ================================
##                       Model 1   
## --------------------------------
## (Intercept)            0.071 ***
##                       (0.016)   
## unemploy_shock_diff2   0.039    
##                       (0.026)   
## --------------------------------
## R^2                    0.040    
## Adj. R^2               0.019    
## Num. obs.             47        
## RMSE                   0.082    
## ================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

7 Correlation between treatment and outcomes/処置変数(失業率ショック, YOY diff2)とアウトカム変数の散布図

  • treatment = YOY of diff2 of unemployment rate

7.1 Outcome in July 2020/2020年7月のアウトカム

# 2020.7月分のデータ抽出 for 自殺率YOY)
df_analysis_2020jul <- df_analysis %>% filter(date == "2020-07-01")

7.2 Scatter plots

  • YOY of suicide rate in July 2020(Total, female, male)
  • 自殺 7月YOY(全体、女性、男性)
# 2021Aug17
# Total / 全体
graph_unemp_suicide_rate_total <- scatter_treat_outcome(dataset = df_analysis_2020jul, 
                      treat_var = df_analysis_2020jul$unemploy_shock_diff2,
                      outcome_var = df_analysis_2020jul$yoy_suicide_rate,
                      point_size_var = df_analysis_2020jul$population_total,
                      main_title = "(b) Total suicides",
                      x_title = "Employment shock measured by the unemployment rate",
                      y_title = "Change in the suicide rate") +
                      scale_y_continuous(limits = c(-1.5, 1.5),
                                         breaks=c(-1.5, -1.0, -0.5, 0.0, 
                                                  0.5, 1.0, 1.5))

ggplotly(graph_unemp_suicide_rate_total)
## `geom_smooth()` using formula 'y ~ x'
# Female / 女性
graph_unemp_suicide_rate_female <- scatter_treat_outcome(dataset = df_analysis_2020jul, 
                      treat_var = df_analysis_2020jul$unemploy_shock_diff2,
                      outcome_var = df_analysis_2020jul$yoy_suicide_rate_female,
                      point_size_var = df_analysis_2020jul$population_total,
                      main_title = "(c) Female suicides",
                      x_title = "Employment shock measured by the unemployment rate",
                      y_title = "Change in the suicide rate")+
                      scale_y_continuous(limits = c(-1.5, 1.5),
                                         breaks=c(-1.5, -1.0, -0.5, 0.0, 
                                                  0.5, 1.0, 1.5))


ggplotly(graph_unemp_suicide_rate_female)
## `geom_smooth()` using formula 'y ~ x'
# Male / 男性
graph_unemp_suicide_rate_male <- scatter_treat_outcome(dataset = df_analysis_2020jul, 
                      treat_var = df_analysis_2020jul$unemploy_shock_diff2,
                      outcome_var = df_analysis_2020jul$yoy_suicide_rate_male,
                      point_size_var = df_analysis_2020jul$population_total,
                      main_title = "(d) Male suicides",
                      x_title = "Employment shock measured by the unemployment rate",
                      y_title = "Change in the suicide rate") +
                      scale_y_continuous(limits = c(-1.5, 1.5),
                                         breaks=c(-1.5, -1.0, -0.5, 0.0, 
                                                  0.5, 1.0, 1.5))

ggplotly(graph_unemp_suicide_rate_male)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
# Bind and save scatter plots / 散布図の統合と保存

## bind and save graphs 
graph_unemp_suicide_bind <- (g_unemploy_shock + graph_unemp_suicide_rate_total) / (graph_unemp_suicide_rate_female +  graph_unemp_suicide_rate_male)

graph_unemp_suicide_bind 
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(file = "output/graph_unemp_suicide_YOY_Jul.pdf", plot = graph_unemp_suicide_bind, 
       dpi = 100, width = 12, height = 12)     
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).